#############################################
#Comparison of volunteer behavior in original and alternate designs
#############################################

rm(list = ls())

#Functions
source("functions.R")

#Packages
installPackageNotFound("xtable")
installPackageNotFound("lfe")

#############################################
#Read in Voter data
#############################################

#All voters
all.voters <- read.csv("voters.csv", stringsAsFactors = F)

#Drop voter pairs in same household as volunteer
all.voters <- all.voters[which(all.voters$same_household==0), ]

#############################################
#Difference in response rates between regional and hyper-local volunteers?
#############################################
#Flag whether any update submitted
all.voters$update.submitted <- as.numeric(!is.na(all.voters$total_updates))

#Responses rates by volunteer group
prop.table(table(all.voters$update.submitted[which(all.voters$Treatment==1)],
      all.voters$Volunteer_In_District[which(all.voters$Treatment==1)]), 2)

#Test for difference
chisq.test(table(all.voters$update.submitted[which(all.voters$Treatment==1)],
      all.voters$Volunteer_In_District[which(all.voters$Treatment==1)]))

#############################################
#Statistical tests for methods of contact
#############################################
#Limit sample for tests to treated voters with any update
all.voters <- all.voters[which(all.voters$Treatment == 1 &
                                   all.voters$update.submitted == 1),]

#Form fields
form.fields <-
  c("Postcard_Binary", "Social_Media_Binary", "Email_Binary",
    "Phone_Binary", "In_Person_Binary", "Text_Binary", "Other_Binary")

#Test vars
test.vars <- c(form.fields, "Method_Count", "days_elapsed", "total_updates",
               "End_Status_In_Progress","End_Status_Success")

#Object for results
result.df <- c()

#Test for differences from pair 1
for(this.var in test.vars){
    #Run lm
    this.test <- summary(felm(formula(paste0(this.var,"~Volunteer_In_District",
                                             "|0|0|volunteer_id")),
                              data = all.voters), robust = T)

    #Copy coefficients
    coef.out <- this.test$coefficients

    #Save Intercept and volunteer difference
    this.intercept <- coef.out["(Intercept)", "Estimate"]

    #Add to df
    result.df <- rbind(result.df, c(this.var, this.test$N,
                                        coef.out[c("Volunteer_In_District"),],
                                        this.intercept))
}

#Convert to df
result.df <- data.frame(result.df, stringsAsFactors = F)

#Convert to numeric
result.df[,c("Estimate", "Cluster.s.e.", "t.value", "Pr...t..", "V7")] <-
  sapply(result.df[,c("Estimate", "Cluster.s.e.", "t.value", "Pr...t..", "V7")],
                                         as.numeric)

#Adjust p-values for multiple tests
result.df[,("Pr...t..")] <- p.adjust(result.df$Pr...t.., method="fdr")

#Calculate adjusted averages
result.df$regional.district <- result.df$V7
result.df$home.district <- result.df$Estimate + result.df$V7

#Copy df
formatted.df <- result.df[,c("V1", "home.district",
                             "regional.district", "Pr...t..")]

#Format coef
formatted.df[,c("home.district", "regional.district")] <-
  sapply(formatted.df[,c("home.district", "regional.district")],
         FUN=function(x)return(formatSig(x,2)))

#Add stars
formatted.df[which(formatted.df[,"Pr...t.."]<0.05),"regional.district"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr...t.."]<0.05),
                             "regional.district"],2), "*")
formatted.df[which(formatted.df[,"Pr...t.."]<0.01),"regional.district"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr...t.."]<0.01),
                             "regional.district"],2), "**")
formatted.df[which(formatted.df[,"Pr...t.."]<0.001),"regional.district"] <-
  paste0(formatSig(result.df[which(formatted.df[,"Pr...t.."]<0.001),
                             "regional.district"],2), "***")

#Keep only needed columns
formatted.df <- formatted.df[,c("V1", "home.district", "regional.district")]

#Nice labels for table
formatted.df$V1 <- c("Postcard", "Social Media",
                           "Email", "Phone", "In Person", "Text", "Other Method",
                           "Method Count",
                           "Days Until First Status Update",
                           "Total Number of Status Updates",
                           "Last Status In Progress", "Last Status Success")

names(formatted.df) <- c("Covariate", "Original", "Alternate")
print(formatted.df)
